*This code produces binscatter plots for projection of data and predictions on IV
*It uses as input the txt files produced at estimation_partest.m
*Corresponding author: Rodrigo Adao
*Date: 09/11/2024

clear all
global data_path  "..\..\Output\Estimation"
global graph_path "..\..\Output\Figures"

cd "$data_path"

*Load data and save dta file
******************************************************************************************
*Predictions/IV based on FGKK estimates 
import delimited "tab_st0.txt", clear

/*structure of the txt files
rows are varieties used in the test of Section 5 (those accouting for 90% of trade)
indx, indm, indt are dummies indicating if the variety is an export price, import price, or tariff revenue, respectively
ww_dw is the welfare weight of that variety
the other variables are structured as follows 

*prefix 
dy is the observed change in that outcomes
dx is the model prediction 
dyt = dy - dx
zn is the naive IV
zw is the preferred IV

*middle part
_mc: variable residualized of baseline control set (indm, indx indm and mean exposure)
_wmc: same as _mc but multiplied by weights specified in the paper

*/

*Save dta file
gen ind = 1 if indm == 1
replace ind = 2 if indt == 1 
replace ind = 3 if indx == 1 
	
save data_estimation.dta, replace

*Create binscatter for import prices (s=1), import tariff rev (s=2), export prices (s=3)
*********************************************************************************************
foreach s of numlist 1/3 {
	use data_estimation.dta, clear
	
	*Keep observations associated with a particular outocme type
	keep if ind == `s'
	
	*Define IV: baseline is preferred IV (welfare weighted and with baseline controls mc)
	local z zw_wmc_dw 
	
	*Normalize IV so that fit line is equal to the test statistic in estimation results (tab outcomes)
	qui sum `z'
	gen adj_`z' = `z'/(`r(sd)'^2)
	
	*Create bincatter plot for observed change dy, model prediction dx and dyt = dy - dx
	foreach v of varlist dy_mc_dw dx_mc_dw dyt_mc_dw {
		*multiple import prices by -1 to account for negative weight
		if `s' == 1 {
			replace `v' = - `v'
		}
		
		*save slope 
		reg `v' adj_`z'
		local b_`v' = _b[adj_`z']
		
		*create bnned data
		qui binscatter `v' adj_`z', n(20) savedata(binned_`v') replace
	}
 
	*Import binned data
	foreach v of varlist dy_mc_dw dx_mc_dw dyt_mc_dw {
		clear
		do binned_`v'
		save binned_`v'.dta, replace
		erase binned_`v'.csv
		erase binned_`v'.do
	}
	
	*Append binned data for different dep var 
	use binned_dy_mc_dw.dta, clear
	append using binned_dx_mc_dw , gen(ind)	
	reshape wide dy_mc_dw dx_mc_dw, i(adj_`z') j(ind)
	drop dy_mc_dw1 dx_mc_dw0
	rename (dx_mc_dw1 dy_mc_dw0) (dx_mc_dw dy_mc_dw)

	append using binned_dyt_mc_dw, gen(ind)
	reshape wide dy_mc_dw dx_mc_dw dyt_mc_dw, i(adj_`z') j(ind)
	drop dyt_mc_dw0 dx_mc_dw1 dy_mc_dw1
	rename (dx_mc_dw0 dy_mc_dw0 dyt_mc_dw1) (dx_mc_dw dy_mc_dw dyt_mc_dw)
	
	*Create fit line
	gen fit_dy_mc_dw  = `b_dy_mc_dw' *adj_`z'
	gen fit_dx_mc_dw  = `b_dx_mc_dw' *adj_`z'
	gen fit_dyt_mc_dw = `b_dyt_mc_dw'*adj_`z'
	
	cap sum adj_`z'
	
	*Create plot titles based on outcome type
	if `s' == 1 {
		local title_label "Import Prices"
		local xmin = r(min)
		local xmax = r(max)
		local panel "b" 
	}
	if `s' == 2 {
		local title_label "Tariff Revenue"
		local xmin = r(min)
		local xmax = r(max)		
		local panel "c" 
	}	
	if `s' == 3 {
		local title_label "Export Prices"
		local panel "a" 
	}
	
	*Create binnscatter plots
	
	if `s' != 3 {
		
		*Plot projection of observed change (dy) and model prediction (dx) onto IV
		twoway (scatter dy_mc_dw adj_`z'  , mc(black) m(D)) ///
			   (scatter dx_mc_dw adj_`z' , mc(gray) m(S) ) ///
			   (lfit    fit_dy_mc_dw adj_`z' , lp(dash) lc(black) ) ///
			   (lfit    fit_dx_mc_dw adj_`z' , lp(dash) lc(gray) ), ///
			   xtitle("preferred IV") xsc(r(`xmin' `xmax')) xlabel(`xmin'(0.05)`xmax', format(%3.2f)) ///
			   legend(order (1 2) label (1 "{&Delta} y") label (2 "{&Delta} x") ring(0) pos(10) ) ///
			   plotregion(fcolor(white)) graphregion(fcolor(white)) ///
			   name(plot1_spec`s', replace)
			   
	}	
	
	if `s' == 3 {
	
		*Plot 1: overlay projection of observed change (dy) and model prediction (dx) onto IV
		twoway (scatter dy_mc_dw adj_`z'  , mc(black) m(D)) ///
			   (scatter dx_mc_dw adj_`z' , mc(gray) m(S) ) ///
			   (lfit    fit_dy_mc_dw adj_`z' , lp(dash) lc(black) ) ///
			   (lfit    fit_dx_mc_dw adj_`z' , lp(dash) lc(gray) ), ///
				xtitle("preferred IV")  /// 			   		   
			   legend(order (1 2) label (1 "{&Delta} y") label (2 "{&Delta} x") ring(0) pos(10) ) ///
			   plotregion(fcolor(white)) graphregion(fcolor(white)) ///
			   name(plot1_spec`s', replace)

	}
	
	*Save plots	
	graph export "$graph_path\Fig_7_`panel'.png", as(png) name("plot1_spec`s'") replace
	graph export "$graph_path\Fig_7_`panel'.eps", as(eps) name("plot1_spec`s'") replace

	*erase auxiliary files
	erase binned_dx_mc_dw.dta
	erase binned_dy_mc_dw.dta
	erase binned_dyt_mc_dw.dta
	
}

*erase auxiliary files
erase data_estimation.dta
